{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Collecting transformers\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/15/fc/7b6dd7e1adc0a6407b845ed4be1999e98b6917d0694e57316d140cc85484/transformers-4.39.3-py3-none-any.whl (8.8 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting modelscope\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/83/9f/5a7802670bbd13e69d110032ba8aab0264dc42d82b4b7e87f4396647c0ae/modelscope-1.13.3-py3-none-any.whl (5.7 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: filelock in /data/projects/py-life/.venv/lib/python3.12/site-packages (from transformers) (3.13.4)\n",
      "Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/05/c0/779afbad8e75565c09ffa24a88b5dd7e293c92b74eb09df6435fc58ac986/huggingface_hub-0.22.2-py3-none-any.whl (388 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m388.9/388.9 kB\u001b[0m \u001b[31m709.6 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from transformers) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from transformers) (24.0)\n",
      "Collecting pyyaml>=5.1 (from transformers)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b4/33/720548182ffa8344418126017aa1d4ab4aeec9a2275f04ce3f3573d8ace8/PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (724 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m725.0/725.0 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting regex!=2019.12.17 (from transformers)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/fe/4e/242050c3ff38c08f16b31a5a338525def3f85b819fc0c5a97c35217098a7/regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (789 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m789.1/789.1 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: requests in /data/projects/py-life/.venv/lib/python3.12/site-packages (from transformers) (2.31.0)\n",
      "Collecting tokenizers<0.19,>=0.14 (from transformers)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/f6/de/3707df0c1d7bf55e6a4dba724700353bfee8e292fdd8ccfe93416549124d/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting safetensors>=0.4.1 (from transformers)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/98/54/ba865423f14354b48634ab7e908ba27b8c6a37208f3ee630149c443d7680/safetensors-0.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from transformers) (4.64.1)\n",
      "Collecting addict (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/6a/00/b08f23b7d7e1e14ce01419a467b583edbb93c6cdb8654e54a9cc579cd61f/addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
      "Requirement already satisfied: attrs in /data/projects/py-life/.venv/lib/python3.12/site-packages (from modelscope) (23.2.0)\n",
      "Collecting datasets>=2.14.5 (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/95/fc/661a7f06e8b7d48fcbd3f55423b7ff1ac3ce59526f146fda87a1e1788ee4/datasets-2.18.0-py3-none-any.whl (510 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting einops (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/29/0b/2d1c0ebfd092e25935b86509a9a817159212d82aa43d7fb07eca4eeff2c2/einops-0.7.0-py3-none-any.whl (44 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting gast>=0.2.2 (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/fa/39/5aae571e5a5f4de9c3445dae08a530498e5c53b0e74410eeeb0991c79047/gast-0.5.4-py3-none-any.whl (19 kB)\n",
      "Collecting oss2 (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/d5/63/b6c355af7f04a8a1d5759fa6fc47539e25ef8e6f2745372a242fdadcac65/oss2-2.18.4.tar.gz (278 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m278.1/278.1 kB\u001b[0m \u001b[31m641.6 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25h  Installing build dependencies ... \u001b[?25ldone\n",
      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25hRequirement already satisfied: pandas in /data/projects/py-life/.venv/lib/python3.12/site-packages (from modelscope) (2.2.2)\n",
      "Requirement already satisfied: Pillow>=6.2.0 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from modelscope) (10.3.0)\n",
      "Collecting pyarrow!=9.0.0,>=6.0.0 (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/f5/87/6270d60494909a45beac5afcb49f67b6a2f19ea07e25d130c62ae4e02bdc/pyarrow-15.0.2-cp312-cp312-manylinux_2_28_x86_64.whl (38.3 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.3/38.3 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: python-dateutil>=2.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from modelscope) (2.9.0.post0)\n",
      "Requirement already satisfied: scipy in /data/projects/py-life/.venv/lib/python3.12/site-packages (from modelscope) (1.13.0)\n",
      "Collecting setuptools (from modelscope)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/92/e1/1c8bb3420105e70bdf357d57dd5567202b4ef8d27f810e98bb962d950834/setuptools-69.2.0-py3-none-any.whl (821 kB)\n",
      "Collecting simplejson>=3.3.0 (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/f2/6b/ce0090a8026acc8ed027ec9471a551e02dafb7973674773fb218824130f9/simplejson-3.19.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (152 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m152.4/152.4 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting sortedcontainers>=1.5.9 (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n",
      "Requirement already satisfied: urllib3>=1.26 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from modelscope) (2.2.1)\n",
      "Collecting yapf (from modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/66/c9/d4b03b2490107f13ebd68fe9496d41ae41a7de6275ead56d0d4621b11ffd/yapf-0.40.2-py3-none-any.whl (254 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m254.7/254.7 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting pyarrow-hotfix (from datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
      "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl (116 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting xxhash (from datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/ce/d4/8111e14273c0781349af8d0dae55c4e42c7196e7237e81a3db5186ab7dfe/xxhash-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.3/194.3 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting multiprocess (from datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl (146 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m146.7/146.7 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting fsspec[http]<=2024.2.0,>=2023.1.0 (from datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/ad/30/2281c062222dc39328843bd1ddd30ff3005ef8e30b2fd09c4d2792766061/fsspec-2024.2.0-py3-none-any.whl (170 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m170.9/170.9 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting aiohttp (from datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/fa/d1/13d90775af7115d97d6fa8815cc1704fabd24080e5a6de0a88ac250e8707/aiohttp-3.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: fsspec>=2023.5.0 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2024.3.1)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.11.0)\n",
      "Requirement already satisfied: six>=1.5 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from python-dateutil>=2.1->modelscope) (1.16.0)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from requests->transformers) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from requests->transformers) (3.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from requests->transformers) (2024.2.2)\n",
      "Collecting crcmod>=1.7 (from oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/6b/b0/e595ce2a2527e169c3bcd6c33d2473c1918e0b7f6826a043ca1245dd4e5b/crcmod-1.7.tar.gz (89 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.7/89.7 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25h  Installing build dependencies ... \u001b[?25ldone\n",
      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25hCollecting pycryptodome>=3.4.7 (from oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/af/20/5f29ec45462360e7f61e8688af9fe4a0afae057edfabdada662e11bf97e7/pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting aliyun-python-sdk-kms>=2.4.1 (from oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/3d/ea/d88e08bfc4a0aee0111f1f24c98b19107bc6783441e7e944907c77b2243d/aliyun_python_sdk_kms-2.16.2-py2.py3-none-any.whl (94 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.0/94.0 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting aliyun-python-sdk-core>=2.13.12 (from oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/cf/0f/c191007d4a0c068725009489d7f928614151da938598b875568a6323cff2/aliyun-python-sdk-core-2.15.0.tar.gz (443 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m443.1/443.1 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25h  Installing build dependencies ... \u001b[?25ldone\n",
      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25hRequirement already satisfied: pytz>=2020.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from pandas->modelscope) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.7 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from pandas->modelscope) (2024.1)\n",
      "Collecting importlib-metadata>=6.6.0 (from yapf->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2d/0a/679461c511447ffaf176567d5c496d1de27cbe34a87df6677d7171b2fbd4/importlib_metadata-7.1.0-py3-none-any.whl (24 kB)\n",
      "Requirement already satisfied: platformdirs>=3.5.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from yapf->modelscope) (4.2.0)\n",
      "Requirement already satisfied: tomli>=2.0.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from yapf->modelscope) (2.0.1)\n",
      "Collecting jmespath<1.0.0,>=0.9.3 (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl (24 kB)\n",
      "Collecting cryptography>=2.6.0 (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/48/c8/c0962598c43d3cff2c9d6ac66d0c612bdfb1975be8d87b8889960cf8c81d/cryptography-42.0.5-cp39-abi3-manylinux_2_28_x86_64.whl (4.6 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m972.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting aiosignal>=1.1.2 (from aiohttp->datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/76/ac/a7305707cb852b7e16ff80eaf5692309bde30e2b1100a1fcacdc8f731d97/aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
      "Collecting frozenlist>=1.1.1 (from aiohttp->datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/0b/f2/b8158a0f06faefec33f4dff6345a575c18095a44e52d4f10c678c137d0e0/frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (281 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting multidict<7.0,>=4.5 (from aiohttp->datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/24/1f/af976383b0b772dd351210af5b60ff9927e3abb2f4a103e93da19a957da0/multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.8/130.8 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting yarl<2.0,>=1.0 (from aiohttp->datasets>=2.14.5->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/28/1c/bdb3411467b805737dd2720b85fd082e49f59bf0cc12dc1dfcc80ab3d274/yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (322 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.4/322.4 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting zipp>=0.5 (from importlib-metadata>=6.6.0->yapf->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/c2/0a/ba9d0ee9536d3ef73a3448e931776e658b36f128d344e175bc32b092a8bf/zipp-3.18.1-py3-none-any.whl (8.2 kB)\n",
      "Collecting cffi>=1.12 (from cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/09/d4/8759cc3b2222c159add8ce3af0089912203a31610f4be4c36f98e320b4c6/cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (477 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m477.6/477.6 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting pycparser (from cffi>=1.12->cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl (117 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.6/117.6 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hBuilding wheels for collected packages: oss2, aliyun-python-sdk-core, crcmod\n",
      "  Building wheel for oss2 (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for oss2: filename=oss2-2.18.4-py3-none-any.whl size=115939 sha256=b2e8ee8d63cd96e976aa30fc512dd9fa6e64317dae1dd4ebe4f529a7cec34e79\n",
      "  Stored in directory: /home/kevin/.cache/pip/wheels/92/40/4c/60fc7bf5ba7f9854cb07d5182789f44cda18427ff43fc0ebb5\n",
      "  Building wheel for aliyun-python-sdk-core (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for aliyun-python-sdk-core: filename=aliyun_python_sdk_core-2.15.0-py3-none-any.whl size=535311 sha256=82b32771e7f9ba51c2c6a280d0ce676c39d292ebe3095c69198121de3fedc47e\n",
      "  Stored in directory: /home/kevin/.cache/pip/wheels/e8/76/2e/c0507e86db3948025fcd1a62e95fb01423df952af5d4af8f5a\n",
      "  Building wheel for crcmod (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for crcmod: filename=crcmod-1.7-cp312-cp312-linux_x86_64.whl size=23525 sha256=25e9a33ee2c118960f2b7e67f829930b5e7aaff8dc8d4d14427df9d7b5fb47a5\n",
      "  Stored in directory: /home/kevin/.cache/pip/wheels/8a/76/13/662de3a16f9e9356e2d443cce4635de07f200629df084d4fef\n",
      "Successfully built oss2 aliyun-python-sdk-core crcmod\n",
      "Installing collected packages: sortedcontainers, crcmod, addict, zipp, xxhash, simplejson, setuptools, safetensors, regex, pyyaml, pycryptodome, pycparser, pyarrow-hotfix, pyarrow, multidict, jmespath, gast, fsspec, frozenlist, einops, dill, yarl, multiprocess, importlib-metadata, huggingface-hub, cffi, aiosignal, yapf, tokenizers, cryptography, aiohttp, transformers, aliyun-python-sdk-core, datasets, aliyun-python-sdk-kms, oss2, modelscope\n",
      "  Attempting uninstall: fsspec\n",
      "    Found existing installation: fsspec 2024.3.1\n",
      "    Uninstalling fsspec-2024.3.1:\n",
      "      Successfully uninstalled fsspec-2024.3.1\n",
      "Successfully installed addict-2.4.0 aiohttp-3.9.4 aiosignal-1.3.1 aliyun-python-sdk-core-2.15.0 aliyun-python-sdk-kms-2.16.2 cffi-1.16.0 crcmod-1.7 cryptography-42.0.5 datasets-2.18.0 dill-0.3.8 einops-0.7.0 frozenlist-1.4.1 fsspec-2024.2.0 gast-0.5.4 huggingface-hub-0.22.2 importlib-metadata-7.1.0 jmespath-0.10.0 modelscope-1.13.3 multidict-6.0.5 multiprocess-0.70.16 oss2-2.18.4 pyarrow-15.0.2 pyarrow-hotfix-0.6 pycparser-2.22 pycryptodome-3.20.0 pyyaml-6.0.1 regex-2023.12.25 safetensors-0.4.2 setuptools-69.2.0 simplejson-3.19.2 sortedcontainers-2.4.0 tokenizers-0.15.2 transformers-4.39.3 xxhash-3.4.1 yapf-0.40.2 yarl-1.9.4 zipp-3.18.1\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install transformers modelscope"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/projects/py-life/.venv/lib/python3.12/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "2024-04-13 20:30:55,050 - modelscope - INFO - PyTorch version 2.2.2+cu121 Found.\n",
      "2024-04-13 20:30:55,051 - modelscope - INFO - Loading ast index from /home/kevin/.cache/modelscope/ast_indexer\n",
      "2024-04-13 20:30:55,052 - modelscope - INFO - No valid ast index found from /home/kevin/.cache/modelscope/ast_indexer, generating ast index from prebuilt!\n",
      "2024-04-13 20:30:55,105 - modelscope - INFO - Loading done! Current index file version is 1.13.3, with md5 f1d7984114568372d6beab0cc82cbe8b and a total number of 972 components indexed\n",
      "Downloading: 100%|██████████| 34.4k/34.4k [00:00<00:00, 1.50MB/s]\n",
      "Downloading: 100%|██████████| 500/500 [00:00<00:00, 780kB/s]\n",
      "Downloading: 100%|██████████| 69.5k/69.5k [00:00<00:00, 1.35MB/s]\n",
      "Downloading: 100%|██████████| 9.33k/9.33k [00:00<00:00, 9.10MB/s]\n",
      "Downloading: 100%|██████████| 673/673 [00:00<00:00, 1.44MB/s]\n",
      "Downloading: 100%|██████████| 73.0/73.0 [00:00<00:00, 328kB/s]\n",
      "Downloading: 100%|██████████| 137/137 [00:00<00:00, 596kB/s]\n",
      "Downloading: 100%|██████████| 24.4k/24.4k [00:00<00:00, 867kB/s]\n",
      "Downloading: 100%|█████████▉| 4.61G/4.61G [03:21<00:00, 24.5MB/s]\n",
      "Downloading: 100%|██████████| 64.0M/64.0M [00:11<00:00, 5.63MB/s]\n",
      "Downloading: 100%|██████████| 13.2k/13.2k [00:00<00:00, 631kB/s]\n",
      "Downloading: 100%|██████████| 429k/429k [00:00<00:00, 3.23MB/s]\n",
      "Downloading: 100%|██████████| 6.47k/6.47k [00:00<00:00, 8.66MB/s]\n",
      "Downloading: 100%|██████████| 957/957 [00:00<00:00, 747kB/s]\n",
      "Downloading: 100%|██████████| 16.7M/16.7M [00:01<00:00, 13.8MB/s]\n",
      "Downloading: 100%|██████████| 4.04M/4.04M [00:00<00:00, 13.4MB/s]\n",
      "Downloading: 100%|██████████| 6.20k/6.20k [00:00<00:00, 10.0MB/s]\n",
      "Downloading: 100%|██████████| 629k/629k [00:00<00:00, 4.28MB/s]\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    },
    {
     "ename": "ImportError",
     "evalue": "Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 23\u001b[0m\n\u001b[1;32m     21\u001b[0m model_dir \u001b[38;5;241m=\u001b[39m snapshot_download(model_id)\n\u001b[1;32m     22\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_dir)\n\u001b[0;32m---> 23\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mGemmaForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     24\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbfloat16\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m     25\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     27\u001b[0m input_text \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTake a selfie for me with front camera\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     28\u001b[0m nexa_query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow is the query from the users, please call the correct function and generate the parameters to call the function.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mQuery: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minput_text\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mResponse:\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
      "File \u001b[0;32m/data/projects/py-life/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:2970\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   2966\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   2967\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDeepSpeed Zero-3 is not compatible with `low_cpu_mem_usage=True` or with passing a `device_map`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2968\u001b[0m         )\n\u001b[1;32m   2969\u001b[0m     \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_accelerate_available():\n\u001b[0;32m-> 2970\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m   2971\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2972\u001b[0m         )\n\u001b[1;32m   2974\u001b[0m \u001b[38;5;66;03m# handling bnb config from kwargs, remove after `load_in_{4/8}bit` deprecation.\u001b[39;00m\n\u001b[1;32m   2975\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m load_in_4bit \u001b[38;5;129;01mor\u001b[39;00m load_in_8bit:\n",
      "\u001b[0;31mImportError\u001b[0m: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer, GemmaForCausalLM\n",
    "from modelscope import snapshot_download\n",
    "\n",
    "import torch\n",
    "import time\n",
    "\n",
    "def inference(input_text):\n",
    "    start_time = time.time()\n",
    "    input_ids = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n",
    "    input_length = input_ids[\"input_ids\"].shape[1]\n",
    "    outputs = model.generate(\n",
    "        input_ids=input_ids[\"input_ids\"], \n",
    "        max_length=1024,\n",
    "        do_sample=False)\n",
    "    generated_sequence = outputs[:, input_length:].tolist()\n",
    "    res = tokenizer.decode(generated_sequence[0])\n",
    "    end_time = time.time()\n",
    "    return {\"output\": res, \"latency\": end_time - start_time}\n",
    "\n",
    "model_id = \"AI-ModelScope/Octopus-v2\"\n",
    "model_dir = snapshot_download(model_id)\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_dir)\n",
    "model = GemmaForCausalLM.from_pretrained(\n",
    "    model_dir, torch_dtype=torch.bfloat16, device_map=\"auto\"\n",
    ")\n",
    "\n",
    "input_text = \"Take a selfie for me with front camera\"\n",
    "nexa_query = f\"Below is the query from the users, please call the correct function and generate the parameters to call the function.\\n\\nQuery: {input_text} \\n\\nResponse:\"\n",
    "start_time = time.time()\n",
    "print(\"nexa model result:\\n\", inference(nexa_query))\n",
    "print(\"latency:\", time.time() - start_time,\" s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Collecting accelerate\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/1b/e8/2fc7af3fa77ddac89a9c9b390d2d31d1db0612247ba2274009946959604e/accelerate-0.29.2-py3-none-any.whl (297 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m297.4/297.4 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from accelerate) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from accelerate) (24.0)\n",
      "Requirement already satisfied: psutil in /data/projects/py-life/.venv/lib/python3.12/site-packages (from accelerate) (5.9.8)\n",
      "Requirement already satisfied: pyyaml in /data/projects/py-life/.venv/lib/python3.12/site-packages (from accelerate) (6.0.1)\n",
      "Requirement already satisfied: torch>=1.10.0 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from accelerate) (2.2.2+cu121)\n",
      "Requirement already satisfied: huggingface-hub in /data/projects/py-life/.venv/lib/python3.12/site-packages (from accelerate) (0.22.2)\n",
      "Requirement already satisfied: safetensors>=0.3.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from accelerate) (0.4.2)\n",
      "Requirement already satisfied: filelock in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (3.13.4)\n",
      "Requirement already satisfied: typing-extensions>=4.8.0 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (4.11.0)\n",
      "Requirement already satisfied: sympy in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (1.12)\n",
      "Requirement already satisfied: networkx in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (3.3)\n",
      "Requirement already satisfied: jinja2 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (3.1.3)\n",
      "Requirement already satisfied: fsspec in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (2024.2.0)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (2.19.3)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.10.0->accelerate) (12.4.127)\n",
      "Requirement already satisfied: requests in /data/projects/py-life/.venv/lib/python3.12/site-packages (from huggingface-hub->accelerate) (2.31.0)\n",
      "Requirement already satisfied: tqdm>=4.42.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from huggingface-hub->accelerate) (4.64.1)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.5)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from requests->huggingface-hub->accelerate) (3.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from requests->huggingface-hub->accelerate) (2.2.1)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from requests->huggingface-hub->accelerate) (2024.2.2)\n",
      "Requirement already satisfied: mpmath>=0.19 in /data/projects/py-life/.venv/lib/python3.12/site-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n",
      "Installing collected packages: accelerate\n",
      "Successfully installed accelerate-0.29.2\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install accelerate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/home/kevin/.cache/modelscope/hub/AI-ModelScope/Octopus-v2'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_dir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[device(type='cuda', index=0)]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gpus = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]\n",
    "gpus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建device_map\n",
    "device_map = {\n",
    "    \"transformer\": gpus[0],  # 将transformer部分放置在第一个GPU上\n",
    "    # 如果模型有其他子模块，也可以为它们指定设备\n",
    "    # \"lm_head\": gpus[1],  # 将lm_head部分放置在第二个GPU上\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.56s/it]\n",
      "WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "model GemmaModel(\n",
      "  (embed_tokens): Embedding(256022, 2048, padding_idx=0)\n",
      "  (layers): ModuleList(\n",
      "    (0-17): 18 x GemmaDecoderLayer(\n",
      "      (self_attn): GemmaSdpaAttention(\n",
      "        (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
      "        (k_proj): Linear(in_features=2048, out_features=256, bias=False)\n",
      "        (v_proj): Linear(in_features=2048, out_features=256, bias=False)\n",
      "        (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
      "        (rotary_emb): GemmaRotaryEmbedding()\n",
      "      )\n",
      "      (mlp): GemmaMLP(\n",
      "        (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)\n",
      "        (up_proj): Linear(in_features=2048, out_features=16384, bias=False)\n",
      "        (down_proj): Linear(in_features=16384, out_features=2048, bias=False)\n",
      "        (act_fn): PytorchGELUTanh()\n",
      "      )\n",
      "      (input_layernorm): GemmaRMSNorm()\n",
      "      (post_attention_layernorm): GemmaRMSNorm()\n",
      "    )\n",
      "  )\n",
      "  (norm): GemmaRMSNorm()\n",
      ")\n",
      "lm_head Linear(in_features=2048, out_features=256022, bias=False)\n",
      "nexa model result:\n",
      " {'output': ' <nexa_0>(\\'front\\')<nexa_end>\\n\\nFunction description: \\ndef take_a_photo(camera):\\n    \"\"\"\\n    Captures a photo using the specified camera and resolution settings.\\n\\n    Parameters:\\n    - camera (str): Specifies the camera to use. Can be \\'front\\' or \\'back\\'. The default is \\'back\\'.\\n\\n    Returns:\\n    - str: The string contains the file path of the captured photo if successful, or an error message if not. Example: \\'/storage/emulated/0/Pictures/MyApp/IMG_20240310_123456.jpg\\'\\n    \"\"\"\\n<eos>', 'latency': 50.398438453674316}\n",
      "latency: 50.398685932159424  s\n"
     ]
    }
   ],
   "source": [
    "import accelerate\n",
    "from transformers import AutoTokenizer, GemmaForCausalLM\n",
    "\n",
    "import torch\n",
    "import time\n",
    "\n",
    "\n",
    "def inference(input_text):\n",
    "    start_time = time.time()\n",
    "    input_ids = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n",
    "    input_length = input_ids[\"input_ids\"].shape[1]\n",
    "    outputs = model.generate(\n",
    "        input_ids=input_ids[\"input_ids\"], max_length=1024, do_sample=False\n",
    "    )\n",
    "    generated_sequence = outputs[:, input_length:].tolist()\n",
    "    res = tokenizer.decode(generated_sequence[0])\n",
    "    end_time = time.time()\n",
    "    return {\"output\": res, \"latency\": end_time - start_time}\n",
    "\n",
    "\n",
    "model_dir = \"/home/kevin/.cache/modelscope/hub/AI-ModelScope/Octopus-v2\"\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_dir)\n",
    "\n",
    "gpus = [torch.device(f\"cuda:{i}\") for i in range(torch.cuda.device_count())]\n",
    "device_map = {\n",
    "    \"transformer\": gpus[0],\n",
    "    \"lm_head\": gpus[0],\n",
    "}\n",
    "# model = GemmaForCausalLM.from_pretrained(\n",
    "#     model_dir, torch_dtype=torch.bfloat16, device_map=device_map\n",
    "# )\n",
    "model = GemmaForCausalLM.from_pretrained(\n",
    "    model_dir, torch_dtype=torch.bfloat16, device_map='auto'\n",
    ")\n",
    "for name, module in model.named_children():\n",
    "    print(name, module)\n",
    "model.eval()\n",
    "\n",
    "input_text = \"Take a selfie for me with front camera\"\n",
    "nexa_query = f\"Below is the query from the users, please call the correct function and generate the parameters to call the function.\\n\\nQuery: {input_text} \\n\\nResponse:\"\n",
    "start_time = time.time()\n",
    "print(\"nexa model result:\\n\", inference(nexa_query))\n",
    "print(\"latency:\", time.time() - start_time, \" s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/projects/py-life/.venv/lib/python3.12/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "Gemma's activation function should be approximate GeLU and not exact GeLU.\n",
      "Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.\n",
      "Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.27s/it]\n",
      "WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "model GemmaModel(\n",
      "  (embed_tokens): Embedding(256022, 2048, padding_idx=0)\n",
      "  (layers): ModuleList(\n",
      "    (0-17): 18 x GemmaDecoderLayer(\n",
      "      (self_attn): GemmaSdpaAttention(\n",
      "        (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
      "        (k_proj): Linear(in_features=2048, out_features=256, bias=False)\n",
      "        (v_proj): Linear(in_features=2048, out_features=256, bias=False)\n",
      "        (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
      "        (rotary_emb): GemmaRotaryEmbedding()\n",
      "      )\n",
      "      (mlp): GemmaMLP(\n",
      "        (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)\n",
      "        (up_proj): Linear(in_features=2048, out_features=16384, bias=False)\n",
      "        (down_proj): Linear(in_features=16384, out_features=2048, bias=False)\n",
      "        (act_fn): PytorchGELUTanh()\n",
      "      )\n",
      "      (input_layernorm): GemmaRMSNorm()\n",
      "      (post_attention_layernorm): GemmaRMSNorm()\n",
      "    )\n",
      "  )\n",
      "  (norm): GemmaRMSNorm()\n",
      ")\n",
      "lm_head Linear(in_features=2048, out_features=256022, bias=False)\n",
      "eval model\n",
      "nexa model result:\n",
      " {'output': ' <nexa_0>(\\'front\\')<nexa_end>\\n\\nFunction description: \\ndef take_a_photo(camera):\\n    \"\"\"\\n    Captures a photo using the specified camera and resolution settings.\\n\\n    Parameters:\\n    - camera (str): Specifies the camera to use. Can be \\'front\\' or \\'back\\'. The default is \\'back\\'.\\n\\n    Returns:\\n    - str: The string contains the file path of the captured photo if successful, or an error message if not. Example: \\'/storage/emulated/0/Pictures/MyApp/IMG_20240310_123456.jpg\\'\\n    \"\"\"\\n<eos>', 'latency': 49.434717655181885}\n",
      "latency: 49.434924840927124  s\n"
     ]
    }
   ],
   "source": [
    "import accelerate\n",
    "from transformers import AutoTokenizer, GemmaForCausalLM\n",
    "\n",
    "import torch\n",
    "import time\n",
    "\n",
    "\n",
    "def inference(input_text):\n",
    "    start_time = time.time()\n",
    "    input_ids = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n",
    "    input_length = input_ids[\"input_ids\"].shape[1]\n",
    "    outputs = model.generate(\n",
    "        input_ids=input_ids[\"input_ids\"], max_length=1024, do_sample=False\n",
    "    )\n",
    "    generated_sequence = outputs[:, input_length:].tolist()\n",
    "    res = tokenizer.decode(generated_sequence[0])\n",
    "    end_time = time.time()\n",
    "    return {\"output\": res, \"latency\": end_time - start_time}\n",
    "\n",
    "\n",
    "model_dir = \"/home/kevin/.cache/modelscope/hub/AI-ModelScope/Octopus-v2\"\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_dir)\n",
    "\n",
    "model = GemmaForCausalLM.from_pretrained(\n",
    "    model_dir, torch_dtype=torch.bfloat16, device_map='auto'\n",
    ")\n",
    "for name, module in model.named_children():\n",
    "    print(name, module)\n",
    "print(\"eval model\")\n",
    "model.eval()\n",
    "\n",
    "input_text = \"Take a selfie for me with front camera\"\n",
    "nexa_query = f\"Below is the query from the users, please call the correct function and generate the parameters to call the function.\\n\\nQuery: {input_text} \\n\\nResponse:\"\n",
    "start_time = time.time()\n",
    "print(\"nexa model result:\\n\", inference(nexa_query))\n",
    "print(\"latency:\", time.time() - start_time, \" s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nexa model result:\n",
      " {'output': ' <nexa_1>(\\'trending news\\')<nexa_end>\\n\\nFunction description: \\ndef get_trending_news(query):\\n    \"\"\"\\n    Retrieves a collection of trending news articles relevant to a specified query.\\n\\n    Parameters:\\n    - query (str): Topic for news articles.\\n\\n    Returns:\\n    - list[str]: A list of strings, where each string represents a single news article. Each article representation includes the article\\'s title and its URL, allowing users to easily access the full article for detailed information.\\n    \"\"\"\\n<eos>', 'latency': 40.03187799453735}\n"
     ]
    }
   ],
   "source": [
    "input_text = \"Get trending news\"\n",
    "nexa_query = f\"Below is the query from the users, please call the correct function and generate the parameters to call the function.\\n\\nQuery: {input_text} \\n\\nResponse:\"\n",
    "start_time = time.time()\n",
    "print(\"nexa model result:\\n\", inference(nexa_query))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
